#https://datatables.net/reference/option/
options(DT.options = list(scrollX = TRUE, pagin=TRUE, fixedHeader = TRUE, searchHighlight = TRUE))

Introduction

Check out this Kaggle

Business Goals

  1. Predict the number of bookings for resort and city for the next 4 weeks
  2. Determine if we can classify a booking as a resort or city type
  3. Predict adr prices based on other features

Ideas

  1. logistic regression / random forests
  2. fb’s prophet package
  3. elastic net, random forest, regression

Get Data

a = read_csv('hotel_bookings.csv') %>%
  clean_names() %>% 
  mutate(across(where(is.character), factor)) %>% 
  select(sort(tidyselect::peek_vars())) %>% 
  select(
    where(is.Date),
    where(is.factor),
    where(is.numeric)
  ) %>% filter(is_canceled == 0) #filter to non-canceled bookings

a$is_canceled = NULL

Split Data

split = initial_split(a)
train = rsample::training(split)
test = rsample::testing(split)

Resources

  1. adr
  2. transient
  3. group rate
  4. Guide to Hotel Management
  5. distribution channels

5 min EDA

train %>% head
skimr::skim(train)
Data summary
Name train
Number of rows 56375
Number of columns 31
_______________________
Column type frequency:
Date 1
factor 13
numeric 17
________________________
Group variables None

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
reservation_status_date 0 1 2015-07-01 2017-09-14 2016-09-03 804

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
agent 0 1 FALSE 301 9: 13983, NUL: 9281, 240: 6342, 7: 2309
arrival_date_month 0 1 FALSE 12 Aug: 6557, Jul: 5965, May: 5255, Oct: 5176
assigned_room_type 0 1 FALSE 10 A: 30769, D: 14227, E: 4412, F: 2125
company 0 1 FALSE 313 NUL: 52139, 40: 652, 223: 501, 45: 170
country 0 1 FALSE 153 PRT: 15728, GBR: 7254, FRA: 6395, ESP: 4793
customer_type 0 1 FALSE 4 Tra: 39897, Tra: 14023, Con: 2065, Gro: 390
deposit_type 0 1 FALSE 3 No : 56196, Ref: 103, Non: 76
distribution_channel 0 1 FALSE 4 TA/: 43271, Dir: 9061, Cor: 3918, GDS: 125
hotel 0 1 FALSE 2 Cit: 34668, Res: 21707
market_segment 0 1 FALSE 7 Onl: 26891, Off: 11858, Dir: 7990, Gro: 5756
meal 0 1 FALSE 5 BB: 43371, HB: 7049, SC: 5072, Und: 644
reservation_status 0 1 FALSE 1 Che: 56375, Can: 0, No-: 0
reserved_room_type 0 1 FALSE 9 A: 39271, D: 9773, E: 3469, F: 1537

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
adr 0 1 99.96 49.20 0 67.39 92.65 125 508 <U+2587><U+2585><U+2581><U+2581><U+2581>
adults 0 1 1.83 0.51 0 2.00 2.00 2 4 <U+2581><U+2582><U+2587><U+2581><U+2581>
arrival_date_day_of_month 0 1 15.85 8.78 1 8.00 16.00 23 31 <U+2587><U+2587><U+2587><U+2587><U+2586>
arrival_date_week_number 0 1 27.12 13.90 1 16.00 28.00 38 53 <U+2586><U+2587><U+2587><U+2587><U+2586>
arrival_date_year 0 1 2016.15 0.70 2015 2016.00 2016.00 2017 2017 <U+2583><U+2581><U+2587><U+2581><U+2586>
babies 0 1 0.01 0.11 0 0.00 0.00 0 10 <U+2587><U+2581><U+2581><U+2581><U+2581>
booking_changes 0 1 0.29 0.73 0 0.00 0.00 0 21 <U+2587><U+2581><U+2581><U+2581><U+2581>
children 0 1 0.10 0.39 0 0.00 0.00 0 3 <U+2587><U+2581><U+2581><U+2581><U+2581>
days_in_waiting_list 0 1 1.58 14.83 0 0.00 0.00 0 379 <U+2587><U+2581><U+2581><U+2581><U+2581>
is_repeated_guest 0 1 0.04 0.20 0 0.00 0.00 0 1 <U+2587><U+2581><U+2581><U+2581><U+2581>
lead_time 0 1 79.89 91.04 0 9.00 45.00 124 737 <U+2587><U+2582><U+2581><U+2581><U+2581>
previous_bookings_not_canceled 0 1 0.20 1.82 0 0.00 0.00 0 72 <U+2587><U+2581><U+2581><U+2581><U+2581>
previous_cancellations 0 1 0.02 0.28 0 0.00 0.00 0 13 <U+2587><U+2581><U+2581><U+2581><U+2581>
required_car_parking_spaces 0 1 0.10 0.30 0 0.00 0.00 0 8 <U+2587><U+2581><U+2581><U+2581><U+2581>
stays_in_week_nights 0 1 2.46 1.92 0 1.00 2.00 3 50 <U+2587><U+2581><U+2581><U+2581><U+2581>
stays_in_weekend_nights 0 1 0.93 0.99 0 0.00 1.00 2 19 <U+2587><U+2581><U+2581><U+2581><U+2581>
total_of_special_requests 0 1 0.72 0.84 0 0.00 1.00 1 5 <U+2587><U+2581><U+2581><U+2581><U+2581>

clean/encode data

# make arrival date col
train = train %>% mutate(
  arrival.date = make_date(
    year = arrival_date_year,
    month = match(arrival_date_month, month.name),
    day = arrival_date_day_of_month)
  )

# these numeric vars s/b factor vars
train = train %>% mutate_at(vars(arrival_date_day_of_month, arrival_date_week_number, arrival_date_year, is_repeated_guest), factor)

# reordering df
train = train %>% select(sort(tidyselect::peek_vars())) %>% 
  select(
    where(is.Date),
    where(is.factor),
    where(is.numeric)
  )

EDA: time series

Note: not a true time series in that the arrival month is a factor

range

paste(
  'The date range of this dataset is from',
  train %>% pull(arrival.date) %>% range %>% .[1],
  'to',
  train %>% pull(arrival.date) %>% range %>% .[2]
)
## [1] "The date range of this dataset is from 2015-07-01 to 2017-08-31"

time series count graph – ungrouped

train %>% group_by(arrival.date, hotel) %>% 
  summarise(total.bookings = sum(adults, children)) %>% 
  arrange(arrival.date) %>%
  plot_ly(
    x = ~arrival.date,
    y = ~total.bookings
  ) %>% layout(
    title = 'total.bookings by date',
    xaxis = list(title = ''),
    yaxis = list(title = '')
    )
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

time series count graph – grouped (hotel)

train %>% group_by(arrival.date, hotel) %>%
  summarise(count = n()) %>% 
  arrange(arrival.date) %>%
  plot_ly(
    x = ~arrival.date,
    y = ~count,
    color = ~hotel,
    alphtrain = 0.7
  ) %>% layout(
    title = 'total.bookings by date/hotel',
    xaxis = list(title = ''),
    yaxis = list(title = '')
    )
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

time series count graph – grouped (customer_type)

train %>% group_by(arrival.date, customer_type) %>%
  summarise(count = n()) %>% 
  arrange(arrival.date) %>%
  plot_ly(
    x = ~arrival.date,
    y = ~count,
    color = ~customer_type,
    alphtrain = 0.7
  ) %>% layout(
    title = 'total.bookings by date/customer_type',
    xaxis = list(title = ''),
    yaxis = list(title = '')
    )
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

time series count graph – grouped (deposit_type)

train %>% group_by(arrival.date, deposit_type) %>%
  summarise(count = n()) %>% 
  arrange(arrival.date) %>%
  plot_ly(
    x = ~arrival.date,
    y = ~count,
    color = ~deposit_type,
    alphtrain = 0.7
  ) %>% layout(
    title = 'total.bookings by date/deposit_type',
    xaxis = list(title = ''),
    yaxis = list(title = '')
    )
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

time series count graph – grouped (distribution_channel)

train %>% group_by(arrival.date, distribution_channel) %>%
  summarise(count = n()) %>% 
  arrange(arrival.date) %>%
  plot_ly(
    x = ~arrival.date,
    y = ~count,
    color = ~distribution_channel,
    alphtrain = 0.7
  ) %>% layout(
    title = 'total.bookings by date/distribution_channel',
    xaxis = list(title = ''),
    yaxis = list(title = '')
    )
## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

## Warning: 'scatter' objects don't have these attributes: 'alphtrain'
## Valid attributes include:
## 'type', 'visible', 'showlegend', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'selectedpoints', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'stackgroup', 'orientation', 'groupnorm', 'stackgaps', 'text', 'texttemplate', 'hovertext', 'mode', 'hoveron', 'hovertemplate', 'line', 'connectgaps', 'cliponaxis', 'fill', 'fillcolor', 'marker', 'selected', 'unselected', 'textposition', 'textfont', 'r', 't', 'error_x', 'error_y', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'xsrc', 'ysrc', 'textsrc', 'texttemplatesrc', 'hovertextsrc', 'hovertemplatesrc', 'textpositionsrc', 'rsrc', 'tsrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

EDA: nom vars

sample data

train %>% select(where(is.factor)) %>% slice_sample(n = 10)

glimpse structure

train %>% select(where(is.factor)) %>% glimpse
## Rows: 56,375
## Columns: 17
## $ agent                     <fct> NULL, NULL, 304, 240, 240, 303, 240, 241,...
## $ arrival_date_day_of_month <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ arrival_date_month        <fct> July, July, July, July, July, July, July,...
## $ arrival_date_week_number  <fct> 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 2...
## $ arrival_date_year         <fct> 2015, 2015, 2015, 2015, 2015, 2015, 2015,...
## $ assigned_room_type        <fct> C, C, A, A, A, C, E, G, E, E, E, E, G, F,...
## $ company                   <fct> NULL, NULL, NULL, NULL, NULL, NULL, NULL,...
## $ country                   <fct> PRT, PRT, GBR, GBR, GBR, PRT, USA, ESP, P...
## $ customer_type             <fct> Transient, Transient, Transient, Transien...
## $ deposit_type              <fct> No Deposit, No Deposit, No Deposit, No De...
## $ distribution_channel      <fct> Direct, Direct, Corporate, TA/TO, TA/TO, ...
## $ hotel                     <fct> Resort Hotel, Resort Hotel, Resort Hotel,...
## $ is_repeated_guest         <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ market_segment            <fct> Direct, Direct, Corporate, Online TA, Onl...
## $ meal                      <fct> BB, BB, BB, BB, BB, FB, BB, HB, BB, BB, B...
## $ reservation_status        <fct> Check-Out, Check-Out, Check-Out, Check-Ou...
## $ reserved_room_type        <fct> C, C, A, A, A, C, D, G, E, D, E, A, A, F,...

check missing values

train %>% select(where(is.factor)) %>% miss_var_summary

distribution of level counts per factor

jpal = colorRampPalette(brewer.pal(8,'Dark2'))(15)

train %>% select(where(is.factor)) %>%
  map(n_unique) %>%
  as.tibble() %>%
  pivot_longer(everything()) %>%
  plot_ly(y = ~name, x = ~value, color = ~name, colors = jpal) %>%
  add_bars() %>%
  hide_legend() %>% 
  layout(
    title = 'distribution of level counts per factor',
    xaxis = list(title = ''),
    yaxis = list(title = '')
    )
## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

reference: names of unique levels

train %>% select(where(is.factor)) %>%
  map(unique)
## $agent
##   [1] NULL 304  240  303  241  8    250  5    175  134  156  243  242  115  105 
##  [16] 40   147  184  96   2    127  95   177  6    15   305  67   196  152  142 
##  [31] 171  36   104  261  306  149  26   258  71   146  181  88   143  251  275 
##  [46] 248  208  69   110  256  314  126  281  253  185  330  334  328  326  324 
##  [61] 321  313  38   155  68   335  308  332  387  298  273  315  307  75   201 
##  [76] 183  223  94   3    446  468  9    34   327  139  436  270  339  47   128 
##  [91] 154  114  29   301  245  244  193  1    16   336  135  350  195  352  355 
## [106] 348  10   168  363  384  360  375  66   331  91   64   385  78   393  406 
## [121] 249  405  163  414  333  11   427  431  430  426  438  433  418  441  72  
## [136] 450  434  454  455  368  451  57   180  358  464  411  481  469  165  254 
## [151] 467  510  531  440  337  526  493  502  527  479  410  508  535  302  497 
## [166] 187  429  13   7    27   17   28   14   42   20   19   37   61   22   39  
## [181] 21   24   30   50   52   12   44   31   83   32   63   56   89   159  86  
## [196] 79   132  45   4    82   81   74   92   99   85   87   112  117  106  98  
## [211] 111  119  148  151  138  121  158  167  144  118  153  211  210  129  213 
## [226] 174  220  173  216  232  35   23   58   205  157  133  150  214  290  192 
## [241] 191  267  215  252  247  278  280  285  289  269  295  288  122  294  325 
## [256] 234  341  310  344  77   103  346  359  283  364  370  33   371  25   179 
## [271] 53   227  141  378  391  397  404  299  73   354  444  296  461  390  388 
## [286] 453  425  394  262  459  474  229  475  480  423  484  495  219  476  509 
## [301] 449 
## 334 Levels: 1 10 103 104 105 106 107 11 110 111 112 114 115 117 118 119 ... NULL
## 
## $arrival_date_day_of_month
##  [1] 1  2  3  4  5  6  7  8  9  10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31
## 31 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 31
## 
## $arrival_date_month
##  [1] July      August    September October   November  December  January  
##  [8] February  March     April     May       June     
## 12 Levels: April August December February January July June March ... September
## 
## $arrival_date_week_number
##  [1] 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## [26] 52 53 1  2  3  4  5  6  7  8  9  10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [51] 24 25 26
## 53 Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ... 53
## 
## $arrival_date_year
## [1] 2015 2016 2017
## Levels: 2015 2016 2017
## 
## $assigned_room_type
##  [1] C A E G F D B H I K
## Levels: A B C D E F G H I K L P
## 
## $company
##   [1] NULL 110  270  240  154  144  307  268  59   312  318  174  274  113  195 
##  [16] 223  317  281  118  53   286  12   324  342  371  47   331  178  405  337 
##  [31] 94   528  62   51   120  42   82   81   116  530  103  204  112  135  9   
##  [46] 39   16   92   31   61   356  457  501  86   165  291  292  290  43   325 
##  [61] 192  108  34   224  388  269  465  287  297  490  207  169  282  20   437 
##  [76] 263  225  329  272  28   482  200  338  83   72   246  319  159  380  323 
##  [91] 511  407  421  88   278  80   403  399  84   137  343  346  347  289  351 
## [106] 355  54   99   250  358  361  390  362  366  372  365  277  109  14   377 
## [121] 379  22   378  330  364  401  232  384  167  212  514  391  400  376  392 
## [136] 402  396  302  370  367  397  369  409  251  168  428  382  408  413  148 
## [151] 10   333  360  415  422  395  435  442  445  448  443  454  444  394  52  
## [166] 459  458  456  353  254  460  447  470  255  466  184  485  32   491  494 
## [181] 193  516  496  499  308  29   78   146  504  130  520  507  506  498  515 
## [196] 512  126  64   242  477  518  521  523  539  436  525  541  40   455  410 
## [211] 45   38   49   67   68   65   91   8    221  46   76   96   100  115  105 
## [226] 101  93   11   139  142  127  107  140  143  163  149  150  180  238  219 
## [241] 186  179  183  222  153  197  203  185  217  209  215  230  35   216  227 
## [256] 245  218  158  259  260  411  257  271  18   106  275  210  273  71   284 
## [271] 301  233  305  293  264  311  304  313  288  320  334  314  332  341  349 
## [286] 350  73   383  368  393  220  412  420  426  417  243  429  433  446  450 
## [301] 418  424  280  357  483  439  489  229  486  481  497  451  492 
## 353 Levels: 10 100 101 102 103 104 105 106 107 108 109 11 110 112 113 ... NULL
## 
## $country
##   [1] PRT  GBR  USA  ESP  IRL  FRA  NULL ROU  NOR  OMN  ARG  POL  DEU  BEL  CN  
##  [16] CHE  ITA  NLD  DNK  SWE  AUS  EST  CZE  BRA  FIN  MOZ  BWA  LUX  RUS  ALB 
##  [31] IND  CHN  MAR  SVN  UKR  LVA  BLR  LTU  TUR  MEX  AGO  ISR  CHL  CYM  ZMB 
##  [46] AUT  ZWE  DZA  CRI  KOR  HUN  HRV  CYP  NZL  KAZ  THA  COL  DOM  MKD  PRI 
##  [61] MYS  GRC  NGA  VEN  GIB  JPN  LKA  ZAF  CMR  IRN  BIH  MUS  COM  SUR  CUB 
##  [76] BGR  CIV  JOR  SYR  SGP  BDI  KWT  URY  LBN  AZE  ARE  QAT  EGY  PER  SVK 
##  [91] CPV  MDV  SRB  MLT  MWI  ECU  MDG  IDN  ISL  CAF  JAM  UZB  NPL  BHS  PAK 
## [106] TGO  TWN  HKG  DJI  VNM  PHL  GEO  TUN  SEN  SAU  ETH  IRQ  LIE  MMR  PAN 
## [121] TMP  BFA  ARM  KEN  MCO  GNB  LBY  TZA  BGD  NAM  BOL  SYC  PRY  BRB  ABW 
## [136] AIA  SLV  DMA  GAB  PYF  UGA  GUY  LCA  MNE  GTM  GHA  ASM  NCL  STP  KIR 
## [151] TJK  LAO  FRO 
## 178 Levels: ABW AGO AIA ALB AND ARE ARG ARM ASM ATA ATF AUS AUT AZE BDI ... ZWE
## 
## $customer_type
## [1] Transient       Contract        Transient-Party Group          
## Levels: Contract Group Transient Transient-Party
## 
## $deposit_type
## [1] No Deposit Refundable Non Refund
## Levels: No Deposit Non Refund Refundable
## 
## $distribution_channel
## [1] Direct    Corporate TA/TO     GDS      
## Levels: Corporate Direct GDS TA/TO Undefined
## 
## $hotel
## [1] Resort Hotel City Hotel  
## Levels: City Hotel Resort Hotel
## 
## $is_repeated_guest
## [1] 0 1
## Levels: 0 1
## 
## $market_segment
## [1] Direct        Corporate     Online TA     Offline TA/TO Complementary
## [6] Groups        Aviation     
## 8 Levels: Aviation Complementary Corporate Direct Groups ... Undefined
## 
## $meal
## [1] BB        FB        HB        SC        Undefined
## Levels: BB FB HB SC Undefined
## 
## $reservation_status
## [1] Check-Out
## Levels: Canceled Check-Out No-Show
## 
## $reserved_room_type
## [1] C A D G E F H L B
## Levels: A B C D E F G H L P
train = train %>% mutate(arrival_date_month = factor(arrival_date_month, levels = c('January','February','March','April','May','June','July','August','September','October','November','December')))

EDA: num vars

check missing values

train %>% select(where(is.numeric)) %>% miss_var_summary

sample data

train %>% select(where(is.numeric)) %>% slice_sample(n = 10)

glimpse structure

train %>% select(where(is.numeric)) %>% glimpse
## Rows: 56,375
## Columns: 13
## $ adr                            <dbl> 0.00, 0.00, 75.00, 98.00, 98.00, 103...
## $ adults                         <dbl> 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, ...
## $ babies                         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ booking_changes                <dbl> 3, 4, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
## $ children                       <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ...
## $ days_in_waiting_list           <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ lead_time                      <dbl> 342, 737, 13, 14, 14, 9, 68, 18, 37,...
## $ previous_bookings_not_canceled <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ previous_cancellations         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ required_car_parking_spaces    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ stays_in_week_nights           <dbl> 0, 0, 1, 2, 2, 2, 4, 4, 4, 4, 4, 1, ...
## $ stays_in_weekend_nights        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ total_of_special_requests      <dbl> 0, 0, 0, 1, 1, 1, 3, 1, 0, 3, 0, 0, ...

viz: outliers

train %>% select(where(is.numeric)) %>% dlookr::plot_outlier()

There are many upper outliers. When building a prediction model, perhaps we should consider transforming them or removing them entirely.

check right tailness

jquantiles = function(col){quantile(col, probs = c(0.90, 0.95, 0.99, 1))}

train %>% na.omit %>% select(where(is.numeric)) %>%
  map(.x = . , jquantiles) %>%
  as.data.frame.list() %>%
  rownames_to_column() %>%
  as.tibble()

viz: normality

train %>% select(where(is.numeric)) %>% dlookr::plot_normality()

viz: distribution histogram

#with outliers
train %>% select(where(is.numeric)) %>% DataExplorer::plot_histogram(nrow = 2, ncol = 1)

#no outliers
train %>% select(where(is.numeric)) %>% filter(
  adr != 5400,
  adults != 55,
  babies != 10,
  booking_changes != 21,
  children != 10,
  days_in_waiting_list != 391,
  lead_time != 709,
  previous_bookings_not_canceled != 72,
  previous_cancellations != 26,
  required_car_parking_spaces != 8,
  stays_in_week_nights != 50,
  stays_in_weekend_nights != 19
) %>% DataExplorer::plot_histogram(nrow = 2, ncol = 1)

viz: distribution bivariate

#no outliers
train %>% select(hotel, where(is.numeric)) %>% filter(
  adr != 5400,
  adults != 55,
  babies != 10,
  booking_changes != 21,
  children != 10,
  days_in_waiting_list != 391,
  lead_time != 709,
  previous_bookings_not_canceled != 72,
  previous_cancellations != 26,
  required_car_parking_spaces != 8,
  stays_in_week_nights != 50,
  stays_in_weekend_nights != 19
) %>% DataExplorer::plot_boxplot(by = 'hotel', nrow = 3, ncol = 1)

ADR further investigation

#looks like one outlier in adr is changing the scale, making it hard to see the true distribution --remove outlier
a$adr %>% range
## [1]  -6.38 510.00
train %>% filter(adr != 5400) %>%
  select(hotel, adr) %>%
  plot_ly(y = ~hotel, x = ~adr, color = ~hotel, colors = jpal[1:2]) %>% 
  add_boxplot()
#https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train %>% filter(adr != 5400) %>%
  select(hotel, adr, customer_type) %>%
  plot_ly(y = ~hotel, x = ~adr, color = ~customer_type, colors = jpal, group = ~customer_type) %>% 
  add_boxplot() %>% 
  layout(
    boxmode = 'group', #SUPER IMPORTANT
    title = 'ADR by Hotel/customer_type'
    ) 
## Warning in plot_ly(., y = ~hotel, x = ~adr, color = ~customer_type, colors = jpal, : The group argument has been deprecated. Use `group_by()` or split instead.
## See `help('plotly_data')` for examples
## Warning: 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## 'font', 'title', 'uniformtext', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
#https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train %>% filter(adr != 5400) %>%
  select(hotel, adr, market_segment) %>%
  plot_ly(y = ~hotel, x = ~adr, color = ~market_segment, colors = jpal, group = ~market_segment) %>% 
  add_boxplot() %>% 
  layout(
    boxmode = 'group', #SUPER IMPORTANT
    title = 'ADR by Hotel/market_segment'
    ) 
## Warning in plot_ly(., y = ~hotel, x = ~adr, color = ~market_segment, colors = jpal, : The group argument has been deprecated. Use `group_by()` or split instead.
## See `help('plotly_data')` for examples

## Warning in plot_ly(., y = ~hotel, x = ~adr, color = ~market_segment, colors = jpal, : 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## 'font', 'title', 'uniformtext', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
#https://stackoverflow.com/questions/57300053/split-a-plotly-boxplot-x-axis-by-group
train %>% filter(adr != 5400) %>%
  select(hotel, adr, arrival_date_month) %>%
  plot_ly(x = ~hotel, y = ~adr, color = ~arrival_date_month, colors = jpal, group = ~arrival_date_month) %>% 
  add_boxplot() %>% 
  layout(
    boxmode = 'group', #SUPER IMPORTANT
    title = 'ADR by Hotel/arrival_date_month'
    ) 
## Warning in plot_ly(., x = ~hotel, y = ~adr, color = ~arrival_date_month, : The group argument has been deprecated. Use `group_by()` or split instead.
## See `help('plotly_data')` for examples

## Warning in plot_ly(., x = ~hotel, y = ~adr, color = ~arrival_date_month, : 'layout' objects don't have these attributes: 'boxmode'
## Valid attributes include:
## 'font', 'title', 'uniformtext', 'autosize', 'width', 'height', 'margin', 'paper_bgcolor', 'plot_bgcolor', 'separators', 'hidesources', 'showlegend', 'colorway', 'datarevision', 'uirevision', 'editrevision', 'selectionrevision', 'template', 'modebar', 'meta', 'transition', '_deprecated', 'clickmode', 'dragmode', 'hovermode', 'hoverdistance', 'spikedistance', 'hoverlabel', 'selectdirection', 'grid', 'calendar', 'xaxis', 'yaxis', 'ternary', 'scene', 'geo', 'mapbox', 'polar', 'radialaxis', 'angularaxis', 'direction', 'orientation', 'editType', 'legend', 'annotations', 'shapes', 'images', 'updatemenus', 'sliders', 'colorscale', 'coloraxis', 'metasrc', 'barmode', 'bargap', 'mapType'
library(DescTools)
## Warning: package 'DescTools' was built under R version 4.0.3
## 
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
## 
##     %like%
ggplotly(
train %>% filter(adr != 5400) %>% 
  mutate(arrival_date_month = factor(arrival_date_month, labels = DescTools::StrLeft(levels(a$arrival_date_month), 3))) %>% 
  group_by(arrival_date_month, hotel) %>% 
  summarise(med.adr = median(adr, na.rm = TRUE)) %>% 
  ggplot(aes(arrival_date_month, med.adr, fill = hotel)) +
  geom_col(position = 'dodge') +
  scale_fill_manual(values = c('blue4','darkorange'))
) %>% layout(
  title = 'Median ADR by Hotel/Month'
)
## `summarise()` regrouping output by 'arrival_date_month' (override with `.groups` argument)

correlations: viz

train %>% select(where(is.numeric)) %>% dlookr::plot_correlate()

train %>% select(where(is.numeric)) %>% GGally::ggcorr(palette = "RdBu", label = TRUE)

Goal 1: Predict Number of Bookings by Hotel Type

Anomaly Detection

library(anomalize)
## == Use anomalize to improve your Forec
## Business Science offers a 1-hour course - Lab #18: Time Series Anomaly Detection!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
# time_decompose(data, target, method = c("stl", "twitter"), frequency = "auto", trend = "auto", ..., merge = FALSE, message = TRUE)
# anomalize(data, target, method = c("iqr", "gesd"), alpha = 0.05, max_anoms = 0.2, verbose = FALSE)
# The alpha parameter adjusts the width of the critical values. By default, alpha = 0.05.
# Lower values are more conservative while higher values are less prone to incorrectly classifying "normal" observations.
# max_anoms: The maximum percent of anomalies permitted to be identified.

# The STL method uses the stl() function from the stats package. STL works very well in circumstances where a long term trend is present (which applies in this case; see trend component in the prophet graphs below'). 
  
#use full data set, filter to hotel type, arrange by date
a1 = a %>% mutate(
  arrival.date = make_date(
    year = arrival_date_year,
    month = match(arrival_date_month, month.name),
    day = arrival_date_day_of_month)
  )

(anomaly.hotel.resort = a1 %>% filter(hotel == 'Resort Hotel') %>% 
  group_by(arrival.date, hotel) %>% 
  summarise(total.bookings = sum(adults, children)) %>% 
  select(arrival.date, hotel, total.bookings) %>% 
  arrange(arrival.date) %>% as.tibble() %>% 
  time_decompose(total.bookings, method = 'stl', merge = TRUE) %>%
  anomalize(remainder, alpha = 0.15, method = 'gesd') %>% #increasing sensitivity to outliers
  time_recompose())
## `summarise()` regrouping output by 'arrival.date' (override with `.groups` argument)
## Converting from tbl_df to tbl_time.
## Auto-index message: index = arrival.date
## frequency = 7 days
## trend = 91 days
(anomaly.hotel.city = a1 %>% filter(hotel == 'City Hotel') %>% 
  group_by(arrival.date, hotel) %>% 
  summarise(total.bookings = sum(adults, children)) %>% 
  select(arrival.date, hotel, total.bookings) %>% 
  arrange(arrival.date) %>% as.tibble() %>% 
  time_decompose(total.bookings, method = 'stl', merge = TRUE) %>%
  anomalize(remainder, alpha = 0.15, method = 'gesd') %>% #increasing sensitivity to outliers
  time_recompose())
## `summarise()` regrouping output by 'arrival.date' (override with `.groups` argument)
## Converting from tbl_df to tbl_time.
## Auto-index message: index = arrival.date
## frequency = 7 days
## trend = 91 days
ggplotly(
  anomaly.hotel.resort %>% 
    plot_anomalies(
      ncol = 2,
      alpha_dots = 0.5,
      alpha_circles = 0.5,
      size_circles = 2,
      time_recomposed = TRUE,
      alpha_ribbon = 0.05
      ) + scale_y_continuous(labels = comma) +
    labs(x = '', y = 'total.bookings', title = 'resort hotel total.bookings')
  )
ggplotly(
  anomaly.hotel.city %>% 
    plot_anomalies(
      ncol = 2,
      alpha_dots = 0.5,
      alpha_circles = 0.5,
      size_circles = 2,
      time_recomposed = TRUE,
      alpha_ribbon = 0.05
      ) + scale_y_continuous(labels = comma) +
    labs(x = '', y = 'total.bookings', title = 'city hotel total.bookings')
  )

Predicting Next 2 Wks Total Bookings

Resort Hotel

library(prophet)
## Loading required package: Rcpp
## 
## Attaching package: 'Rcpp'
## The following object is masked from 'package:rsample':
## 
##     populate
## Loading required package: rlang
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
##     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
##     splice
## The following object is masked from 'package:data.table':
## 
##     :=
#renaming cols to prophet's col conventions
prophet.resort.df = anomaly.hotel.resort %>% select(ds = arrival.date, y = total.bookings)

#creating model
prophet.resort.mdl = prophet.resort.df %>% prophet()
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#using model make future period df
prophet.resort.future.df = prophet.resort.mdl %>% make_future_dataframe(
  periods = 28, #next 4 wks
  freq = 'day',
  include_history = TRUE
  )

#make forecasts df
prophet.resort.forecast.df = prophet.resort.mdl %>% predict(prophet.resort.future.df)

prophet.resort.forecast.df %>% head %>% DT::datatable()
#plot forecast
prophet.resort.mdl %>% dyplot.prophet(prophet.resort.forecast.df)
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#plot forecast components
prophet.resort.mdl %>% prophet_plot_components(prophet.resort.forecast.df)

City Hotel

library(prophet)

#renaming cols to prophet's col conventions
prophet.city.df = anomaly.hotel.city %>% select(ds = arrival.date, y = total.bookings)

#creating model
prophet.city.mdl = prophet.city.df %>% prophet()
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
#using model make future period df
prophet.city.future.df = prophet.city.mdl %>% make_future_dataframe(
  periods = 28, #next 4 wks
  freq = 'day',
  include_history = TRUE
  )

#make forecasts df
prophet.city.forecast.df = prophet.city.mdl %>% predict(prophet.city.future.df)

prophet.city.forecast.df %>% head %>% DT::datatable()
#plot forecast
prophet.city.mdl %>% dyplot.prophet(prophet.city.forecast.df)
#plot forecast components
prophet.city.mdl %>% prophet_plot_components(prophet.city.forecast.df)